library(tidyverse)  # ggplot(), %>%, mutate(), and friends 
library(broom)
library(MatchIt)  # Match things
library(Rcpp)
library(MASS)
library(modelsummary)
library(IRdisplay)
library(haven)
library(dplyr)
library(jtools)
library(car)
library(margins)
library(marginaleffects)
library(cobalt)
library(ggplot2)

# set directory


# IMPORT MAP
map <- read_csv('C:/Users/Gian Maria/Desktop/Unitn/map/matching/dataset76_20.csv')

regions <- read_csv('C:/Users/Gian Maria/Desktop/Unitn/map/matching/NIBRS/states_region.csv')
regions <- dplyr::rename(regions, State=state_x)


# replace string for being identical acrsso map and nibrs regions
map$State[map$State == 'Rhodes Island'] <- 'Rhode Island'
map$State[map$State == 'District of Columbia'] <- 'District Of Columbia'
# adding missing states
regions <- regions %>% add_row(...1= 888, State = "Alaska", country_region = "west", country_division="Pacific")
regions <- regions %>% add_row(...1= 999, State = "California", country_region = "west", country_division="Pacific")
regions <- regions %>% add_row(...1= 45878, State = "Florida", country_region = "south", country_division="South Atlantic")
regions <- regions %>% add_row(...1= 97666, State = "New Jersey", country_region = "north east", country_division="Middle Atlantic")


# add region and division to map dataset
map<- merge(map, regions, by="State")
library(janitor)

#clean labels: can be done by simply
map <- clean_names(map)

# change to character exposure variable
map$vic_race_black <- as.character(map$vic_race_black)

# final cleaning
map <- subset(map, map$agentype != 4)
map <- subset(map, map$state != "PAPSP8")

##################### IMPORT NIBRS
nibrs <- readRDS('C:/Users/Gian Maria/Desktop/Unitn/map/matching/repo_2023/nibrs_prepared_dataset_91_20.rds')

nibrs <- clean_names(nibrs)
nibrs$race_of_victim_black <- as.character(nibrs$race_of_victim_black)
nibrs$incident_date <- as.POSIXct(nibrs$incident_date) # otherwise margins won't work

# final cleaning
nibrs$agency_indicator[nibrs$agency_indicator == '5'] <- "other agencies"
nibrs$vic_relation[is.na(nibrs$vic_relation)] = "relationship unknown"

# check whether means are statistically significant via t-test
with(map, t.test(solved_yes ~ vic_race_black))










#################################################################
####################### MAIN MODELS #############################
#################################################################



############## NIBRS (all with agency-level info and states)
m.out3ni <- matchit(race_of_victim_black ~ total_offender_segments+total_victims + 
                      sex_of_victim+
                      agecat+
                      decade+
                      agency_indicator+
                      monthly_agency_overlap + vic_relation+
                      state_x,
                    data=nibrs,
                    method = "exact",
                    estimand="ATT")

bal.tab(m.out3ni)


matched_dataset3ni <- match.data(m.out3ni)





#adjusted
model_matched3ni <- glm(solved ~
                          race_of_victim_black + total_offender_segments+total_victims + 
                          sex_of_victim+
                          agecat+
                          decade+
                          weapon+
                          circumstance+
                          agency_indicator+
                          factor(monthly_agency_overlap) +
                          state_x+
                          population_group+
                          location_type+
                          vic_relation,
                        family=quasibinomial(), data = matched_dataset3ni, weights=weights)

tidy(model_matched3ni)




# get odds ratios and 95% cis
summ(model_matched3ni, exp = TRUE, robust=TRUE, digits=3)

# get ame

ame_3ni<-comparisons(model_matched3ni, variables = "race_of_victim_black", vcov="HC3",
                     wts="weights",
                     newdata = subset(matched_dataset3ni, race_of_victim_black == 1)) |>
  summary()

ame_3ni$dataset <- 'NIBRS (1991-2020)'
ame_3ni$model <- 'Adjusted/Matched'


######### unmatched
model_unmatched3ni <- glm(solved ~
                            race_of_victim_black + total_offender_segments+total_victim_segments + 
                            sex_of_victim+
                            agecat+
                            decade+
                            weapon+
                            circumstance+
                            agency_indicator+
                            factor(monthly_agency_overlap) +
                            state_x+
                            population_group+
                            location_type+
                            vic_relation,
                          family=binomial, data = nibrs)

summ(model_unmatched3ni, exp = TRUE, robust=TRUE, digits=3)
# GENERAL ATT
ame_3ni_unmatched<-comparisons(model_unmatched3ni, variables = "race_of_victim_black",
                               newdata = subset(nibrs, race_of_victim_black == 1)) |>
  summary()
ame_3ni_unmatched$dataset <- 'NIBRS (1991-2020)'
ame_3ni_unmatched$model <- 'Adjusted/Unmatched'


###########  MAP, AFTER 1990 (SAME TIME WINDOW AS NIBRS)
map91<- filter(map, year2 > 1990) # create dataset keeping only murders occurred after 91 (included)

############## MAP (agency info + states)
m.out3r <- matchit(vic_race_black ~ vic_count + off_count + vic_sex+
                     age_cat+
                     decade+
                     agentype+
                     monthly_agency_overlap+
                     state+
                     relationship,                        
                   data=map91,
                   method = "exact",
                   estimand="ATT")



bal.tab(m.out3r)

matched_dataset3r <- match.data(m.out3r)


# modeling unadjusted and adjusted and unmatched



# adjusted
model_matched3r <- glm(solved_yes ~ vic_race_black + 
                         vic_count + off_count + vic_sex+
                         age_cat + 
                         decade+
                         weapon+
                         circumstance+
                         agentype+	 
                         factor(monthly_agency_overlap)+
                         state+
                         relationship, 
                       family=quasibinomial(), data = matched_dataset3r, weights=weights)


#tidy(model_matched3r)

summ(model_matched3r, exp = TRUE, robust=TRUE, digits=3)


# GENERAL ATT
ame_3_91<-comparisons(model_matched3r, variables = "vic_race_black", 
                      vcov="HC3", wts="weights",
                      newdata = subset(matched_dataset3r, vic_race_black == 1)) |>
  summary()

ame_3_91$dataset <- 'MAP (1991-2020)'
ame_3_91$model <- 'Adjusted/Matched'




############ non matched
model_unmatched_3r <- glm(solved_yes ~ vic_race_black + 
                            vic_count + off_count + vic_sex+
                            age_cat + 
                            decade+
                            weapon+
                            circumstance+
                            agentype+	 
                            factor(monthly_agency_overlap)+
                            state+relationship, 
                          family=binomial, data = map91)

summ(model_unmatched_3r, exp = TRUE, robust=TRUE, digits=3)

# GENERAL ATT

ame_3_91_unmatched<-comparisons(model_unmatched_3r, variables = "vic_race_black", newdata = subset(map91, vic_race_black == 1)) |>
  summary()

ame_3_91_unmatched$dataset <- 'MAP (1991-2020)'
ame_3_91_unmatched$model <- 'Adjusted/Unmatched'



############### PLOTTING
# plotting (Figure on national AMEs)

ame_list <- do.call("rbind", list(ame_3_91, ame_3_91_unmatched,
                                  ame_3ni, ame_3ni_unmatched ))
ame_list <- ame_list[order(ame_list$dataset),]
ame_list$dataset<- fct_inorder(ame_list$dataset)
ame_list$model = factor(ame_list$model, levels=c("Adjusted/Matched", "Adjusted/Unmatched"))
# plot
dodge <- position_dodge(width=0.5)
ame_plot <- ggplot(ame_list, x=factor(ame_list$dataset, levels=unique(ame_list$dataset)), y=estimate, color=model, aes(ymin=-0.06,ymax=0.06))+
  geom_point(aes(x=factor(ame_list$dataset, levels=unique(ame_list$dataset)), y=estimate,shape=model, color=model), size=2.5, position=dodge)+
  scale_y_continuous(breaks =c("-.06"=-0.06, "-.04"=-0.04,"-.02"=-0.02, "0"=0,
                               0, ".02"=0.02, ".04"=0.04, ".06"=0.06))+
  geom_errorbar(aes(x=dataset, ymin=conf.low, ymax=conf.high, group=model, color=model), width=0.1,position=dodge, name="Model", labels = c("Adjusted/Matched", "Adjusted/Unmatched"))+
  scale_color_manual(values=c("Adjusted/Matched"="red","Adjusted/Unmatched"="blue"), name="Model", labels = c("Adjusted/Matched", "Adjusted/Unmatched"))+
  scale_shape_manual(values=c("Adjusted/Matched"=19,"Adjusted/Unmatched"= 15), name="Model", labels = c("Adjusted/Matched", "Adjusted/Unmatched" ))+
  labs(x= "Dataset", y = "Average Marginal Effect (95% C.I.)")+
  theme_bw()+
  geom_hline(aes(yintercept=0), colour="#990000", linetype="dashed")+
  coord_flip()+theme(axis.text.y = element_text(size=12)) + 
  theme(axis.text.x = element_text(size=12), axis.title=element_text(size=16))+
  #theme(text = element_text(family = "Times New Roman"))+
  theme(legend.title=element_text(size=14), legend.key.size=unit(0.5, 'cm'), legend.text = element_text(size=14))+theme(legend.position="bottom")+
  guides(colour=guide_legend(nrow=1,byrow=TRUE))

